{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "library(ggplot2)\n",
    "library(ComplexUpset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare the datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<caption>A data.frame: 3 × 24</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>title</th><th scope=col>year</th><th scope=col>length</th><th scope=col>budget</th><th scope=col>rating</th><th scope=col>votes</th><th scope=col>r1</th><th scope=col>r2</th><th scope=col>r3</th><th scope=col>r4</th><th scope=col>⋯</th><th scope=col>r9</th><th scope=col>r10</th><th scope=col>mpaa</th><th scope=col>Action</th><th scope=col>Animation</th><th scope=col>Comedy</th><th scope=col>Drama</th><th scope=col>Documentary</th><th scope=col>Romance</th><th scope=col>Short</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>⋯</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>$                     </td><td>1971</td><td>121</td><td>NA</td><td>6.4</td><td>348</td><td>4.5</td><td> 4.5</td><td>4.5</td><td> 4.5</td><td>⋯</td><td> 4.5</td><td> 4.5</td><td></td><td>0</td><td>0</td><td>1</td><td>1</td><td>0</td><td>0</td><td>0</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>$1000 a Touchdown     </td><td>1939</td><td> 71</td><td>NA</td><td>6.0</td><td> 20</td><td>0.0</td><td>14.5</td><td>4.5</td><td>24.5</td><td>⋯</td><td> 4.5</td><td>14.5</td><td></td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>$21 a Day Once a Month</td><td>1941</td><td>  7</td><td>NA</td><td>8.2</td><td>  5</td><td>0.0</td><td> 0.0</td><td>0.0</td><td> 0.0</td><td>⋯</td><td>24.5</td><td>24.5</td><td></td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 3 × 24\n",
       "\\begin{tabular}{r|lllllllllllllllllllll}\n",
       "  & title & year & length & budget & rating & votes & r1 & r2 & r3 & r4 & ⋯ & r9 & r10 & mpaa & Action & Animation & Comedy & Drama & Documentary & Romance & Short\\\\\n",
       "  & <chr> & <int> & <int> & <int> & <dbl> & <int> & <dbl> & <dbl> & <dbl> & <dbl> & ⋯ & <dbl> & <dbl> & <chr> & <int> & <int> & <int> & <int> & <int> & <int> & <int>\\\\\n",
       "\\hline\n",
       "\t1 & \\$                      & 1971 & 121 & NA & 6.4 & 348 & 4.5 &  4.5 & 4.5 &  4.5 & ⋯ &  4.5 &  4.5 &  & 0 & 0 & 1 & 1 & 0 & 0 & 0\\\\\n",
       "\t2 & \\$1000 a Touchdown      & 1939 &  71 & NA & 6.0 &  20 & 0.0 & 14.5 & 4.5 & 24.5 & ⋯ &  4.5 & 14.5 &  & 0 & 0 & 1 & 0 & 0 & 0 & 0\\\\\n",
       "\t3 & \\$21 a Day Once a Month & 1941 &   7 & NA & 8.2 &   5 & 0.0 &  0.0 & 0.0 &  0.0 & ⋯ & 24.5 & 24.5 &  & 0 & 1 & 0 & 0 & 0 & 0 & 1\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 3 × 24\n",
       "\n",
       "| <!--/--> | title &lt;chr&gt; | year &lt;int&gt; | length &lt;int&gt; | budget &lt;int&gt; | rating &lt;dbl&gt; | votes &lt;int&gt; | r1 &lt;dbl&gt; | r2 &lt;dbl&gt; | r3 &lt;dbl&gt; | r4 &lt;dbl&gt; | ⋯ ⋯ | r9 &lt;dbl&gt; | r10 &lt;dbl&gt; | mpaa &lt;chr&gt; | Action &lt;int&gt; | Animation &lt;int&gt; | Comedy &lt;int&gt; | Drama &lt;int&gt; | Documentary &lt;int&gt; | Romance &lt;int&gt; | Short &lt;int&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 1 | $                      | 1971 | 121 | NA | 6.4 | 348 | 4.5 |  4.5 | 4.5 |  4.5 | ⋯ |  4.5 |  4.5 | <!----> | 0 | 0 | 1 | 1 | 0 | 0 | 0 |\n",
       "| 2 | $1000 a Touchdown      | 1939 |  71 | NA | 6.0 |  20 | 0.0 | 14.5 | 4.5 | 24.5 | ⋯ |  4.5 | 14.5 | <!----> | 0 | 0 | 1 | 0 | 0 | 0 | 0 |\n",
       "| 3 | $21 a Day Once a Month | 1941 |   7 | NA | 8.2 |   5 | 0.0 |  0.0 | 0.0 |  0.0 | ⋯ | 24.5 | 24.5 | <!----> | 0 | 1 | 0 | 0 | 0 | 0 | 1 |\n",
       "\n"
      ],
      "text/plain": [
       "  title                  year length budget rating votes r1  r2   r3  r4   ⋯\n",
       "1 $                      1971 121    NA     6.4    348   4.5  4.5 4.5  4.5 ⋯\n",
       "2 $1000 a Touchdown      1939  71    NA     6.0     20   0.0 14.5 4.5 24.5 ⋯\n",
       "3 $21 a Day Once a Month 1941   7    NA     8.2      5   0.0  0.0 0.0  0.0 ⋯\n",
       "  r9   r10  mpaa Action Animation Comedy Drama Documentary Romance Short\n",
       "1  4.5  4.5      0      0         1      1     0           0       0    \n",
       "2  4.5 14.5      0      0         1      0     0           0       0    \n",
       "3 24.5 24.5      0      1         0      0     0           0       1    "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "movies = as.data.frame(ggplot2movies::movies)\n",
    "head(movies, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".list-inline {list-style: none; margin:0; padding: 0}\n",
       ".list-inline>li {display: inline-block}\n",
       ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n",
       "</style>\n",
       "<ol class=list-inline><li>'Action'</li><li>'Animation'</li><li>'Comedy'</li><li>'Drama'</li><li>'Documentary'</li><li>'Romance'</li><li>'Short'</li></ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 'Action'\n",
       "\\item 'Animation'\n",
       "\\item 'Comedy'\n",
       "\\item 'Drama'\n",
       "\\item 'Documentary'\n",
       "\\item 'Romance'\n",
       "\\item 'Short'\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 'Action'\n",
       "2. 'Animation'\n",
       "3. 'Comedy'\n",
       "4. 'Drama'\n",
       "5. 'Documentary'\n",
       "6. 'Romance'\n",
       "7. 'Short'\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1] \"Action\"      \"Animation\"   \"Comedy\"      \"Drama\"       \"Documentary\"\n",
       "[6] \"Romance\"     \"Short\"      "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "genres = colnames(movies)[18:24]\n",
    "genres"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert the genre indicator columns to use boolean values:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<caption>A matrix: 7 × 3 of type lgl</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>1</th><th scope=col>2</th><th scope=col>3</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>Action</th><td>FALSE</td><td>FALSE</td><td>FALSE</td></tr>\n",
       "\t<tr><th scope=row>Animation</th><td>FALSE</td><td>FALSE</td><td> TRUE</td></tr>\n",
       "\t<tr><th scope=row>Comedy</th><td> TRUE</td><td> TRUE</td><td>FALSE</td></tr>\n",
       "\t<tr><th scope=row>Drama</th><td> TRUE</td><td>FALSE</td><td>FALSE</td></tr>\n",
       "\t<tr><th scope=row>Documentary</th><td>FALSE</td><td>FALSE</td><td>FALSE</td></tr>\n",
       "\t<tr><th scope=row>Romance</th><td>FALSE</td><td>FALSE</td><td>FALSE</td></tr>\n",
       "\t<tr><th scope=row>Short</th><td>FALSE</td><td>FALSE</td><td> TRUE</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A matrix: 7 × 3 of type lgl\n",
       "\\begin{tabular}{r|lll}\n",
       "  & 1 & 2 & 3\\\\\n",
       "\\hline\n",
       "\tAction & FALSE & FALSE & FALSE\\\\\n",
       "\tAnimation & FALSE & FALSE &  TRUE\\\\\n",
       "\tComedy &  TRUE &  TRUE & FALSE\\\\\n",
       "\tDrama &  TRUE & FALSE & FALSE\\\\\n",
       "\tDocumentary & FALSE & FALSE & FALSE\\\\\n",
       "\tRomance & FALSE & FALSE & FALSE\\\\\n",
       "\tShort & FALSE & FALSE &  TRUE\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A matrix: 7 × 3 of type lgl\n",
       "\n",
       "| <!--/--> | 1 | 2 | 3 |\n",
       "|---|---|---|---|\n",
       "| Action | FALSE | FALSE | FALSE |\n",
       "| Animation | FALSE | FALSE |  TRUE |\n",
       "| Comedy |  TRUE |  TRUE | FALSE |\n",
       "| Drama |  TRUE | FALSE | FALSE |\n",
       "| Documentary | FALSE | FALSE | FALSE |\n",
       "| Romance | FALSE | FALSE | FALSE |\n",
       "| Short | FALSE | FALSE |  TRUE |\n",
       "\n"
      ],
      "text/plain": [
       "            1     2     3    \n",
       "Action      FALSE FALSE FALSE\n",
       "Animation   FALSE FALSE  TRUE\n",
       "Comedy       TRUE  TRUE FALSE\n",
       "Drama        TRUE FALSE FALSE\n",
       "Documentary FALSE FALSE FALSE\n",
       "Romance     FALSE FALSE FALSE\n",
       "Short       FALSE FALSE  TRUE"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "movies[genres] = movies[genres] == 1\n",
    "t(head(movies[genres], 3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To keep the examples fast to compile we will operate on a subset of the movies with complete data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "movies[movies$mpaa == '', 'mpaa'] = NA\n",
    "movies = na.omit(movies)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Utility for changing output parameters in Jupyter notebooks ([IRKernel kernel](https://github.com/IRkernel/IRkernel)), not relevant if using RStudio or scripting R from terminal:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_size = function(w, h, factor=1.5) {\n",
    "    s = 1 * factor\n",
    "    options(\n",
    "        repr.plot.width=w * s,\n",
    "        repr.plot.height=h * s,\n",
    "        repr.plot.res=100 / factor,\n",
    "        jupyter.plot_mimetypes='image/png',\n",
    "        jupyter.plot_scale=1\n",
    "    )\n",
    "}"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "4.0.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
